use "$temp/nlsy_base_sample_geocode", clear

//parent income/wealth quintiles
drop if parent_inc == . | parent_house == . //require that parent income/wealth be observed
xtile parent_inc_quart = parent_income [fw = weight_1997], nq(4)
xtile parent_inc_dec = parent_income [fw = weight_1997], nq(10)
xtile parent_house_quart = parent_house [fw = weight_1997], nq(4)

//reshape to long
reshape long hgc_ enroll_ hideg_ wage_ weight_ stfips_ coll_stfips_ coll_unitid_ grants_ loans_, i(uniqid) j(year)

******deflate stuff***********
merge m:1 year using "$data/GDP/gdp_pce_deflator", keep(match) nogen
replace deflator = deflator/100 //normalize
replace wage = wage/deflator //deflate labor income
replace grants = grants/deflator
replace loans = loans/deflator

****1997 deflator applied to parent information
su deflator if year == 1997
replace parent_income_1997 = parent_income/`r(mean)'
replace parent_house_value = parent_house_value/`r(mean)'

****student loans
forval y = 2005 / 2009{
	su deflator if year == `y'
	replace educ_loans = educ_loans / `r(mean)' if birthyr == `y' - 25	
}

****normalize a few things by 10k
ds parent_house_value parent_income_1997 educ_loans loans grants
foreach var in `r(varlist)'{
    replace `var' = `var' / 40000
}

//generate age variable in preparation for reshaping
gen age = year - birthyr
drop year sex race 
su wage, d
sort uniqid age

//college enrollment decision: first college up to age 25
sort uniqid age
replace coll_unitid = .  if coll_unitid<0
gen coll_first = coll_unitid
replace coll_first = . if age>25
replace coll_first = . if age<18
replace coll_first = coll_first[_n-1] if uniqid[_n] == uniqid[_n-1] & coll_first[_n-1]!=. & age<=25 //fill in if young enough
ren coll_first temp
bys uniqid: egen coll_first = max(temp)
ren coll_first unitid
merge m:1 unitid using "$temp/ipeds_categories", keep(1 3) nogen //get college category

//averages loans/grants while at college
drop temp
gen temp = loans if coll_unitid == unitid
bys uniqid: egen loans_coll = mean(temp)
drop temp

gen temp = grants if coll_unitid == unitid
bys uniqid: egen grants_coll = mean(temp)
drop temp
drop coll_unitid coll_stfips unitid
drop grants_ loans_

replace cat = 0 if cat == .
tab cat //gets similar non-attendance ratio to Shoya's JMP; good!
drop deflator
ren stfips coll_stfips
ren cat coll_cat

//final educational attainment
sort uniqid age
replace hgc_ = hgc_[_n-1] if hgc_[_n] == . & hgc_[_n-1]!=. & uniqid[_n] == uniqid[_n-1] //fill in hgc
replace hgc_ = hgc_[_n-1] if hgc_[_n] == . & hgc_[_n-1]!=. & uniqid[_n] == uniqid[_n-1] //fill in hgc
gen temp = hgc if age >=27 & age<=29
bys uniqid: egen hgc_final = max(temp)
drop temp

//compressed version
gen hgc_final_comp = 1
replace hgc_final_comp = 2 if hgc_final == 14 | hgc_final == 15
replace hgc_final_comp = 3 if hgc_final>=16 & hgc_final!=.

*****reshape back to wide and begin generating moments
reshape wide wage_ hgc_ enroll_ hideg_ weight_ stfips_, i(uniqid) j(age)
order uniqid birthyr sample ability coll_cat grants_coll loans_coll coll_stfips ///
parent_house* parent_inc* parent_net_worth educ_loan* stfips* hgc* enroll* hideg* ///
wage* weight*

save "$temp/nlsy_moment_data", replace





//end of dofile